Nebula 1

home *** CD-ROM | disk | FTP | other *** search

/ Nebula 1 / Nebula One.iso / Internet / WWW / Perl_WWW_Utilities / MHonArc / lib / mhtxtsetext.pl < prev next >

Wrap

Perl Script | 1995-03-03 | 4.8 KB | 148 lines

##---------------------------------------------------------------------------## ## Library to convert text/setext to HTML. Adapted for use in MHonArc ## by ehood@convex.com, Sept 1994. ## Filter routine can be registered with the following: ## <MIMEFILTERS> ## text/setext:m2h_text_setext'filter:mhtxtsetext.pl ## text/x-setext:m2h_text_setext'filter:mhtxtsetext.pl ## </MIMEFILTERS> ##---------------------------------------------------------------------------## # setext -> HTML converter # # $Id: setext.pl,v 2.8 1994/06/23 05:44:15 sanders Exp $ # # Tony Sanders <sanders@earth.com>, June 1993 # # Status of typotags: # header-tt passed untouched (XXX: use Subject: in next release) # title-tt <H1>...</H1> (and <TITLE> if needed) # subhead-tt <H2>...</H2> (and <TITLE> if needed) # indent-tt reflows paragraphs # # bold-tt <B>...</B> # italic-tt <I>...</I> # underline-tt <I>...</I> # hot-tt <A HREF="...">...</A> (see also href-tt) # # quote-tt <BLOCKQUOTE>...</BLOCKQUOTE> # NIY bullet-tt <UL>...</UL> # # twobuck-tt ignored # suppress-tt suppressed in output # twodot-tt ignored # # Additional typotags supported for HTML: # href-tt .. _text HREF # isindex-tt .. <isindex> # # setext'html -- converts setext (.etx files) to HTML # setext'title -- utility routine to convert setext titles and subheads to HTML # # TODO:XXX # I need to figure out how to allow HTML markup in the text while at the # same time suppresing "unintentional" markup. For now < & > are HTML'ized. # Define the translations supported # $trans{'text/setext'} = "text/html:setext'html"; package m2h_text_setext; # parser states $FMT = 0; # in free flow text (normal HTML mode) $PRE = 1; # in preformated text <PRE>...</PRE> $QUOTE = 2; # in blockquote <BLOCKQUOTE>...</BLOCKQUOTE> sub filter { local($header, *fields, *body) = @_; local(@data) = split(/\n/,$body); $ret = ''; # first pass, process <HEAD> items and hypertext link information for ($i = 0; $i <= $#data; $i++) { $_ = $data[$i]; # $_ is default for m// # <ISINDEX> must be inside <HEAD>...</HEAD> /^\.\.\s+<isindex>/i && do { $data[$i] = ".."; next; }; # locate HREF's: .. _href URL /^\.\.\s+_([^\s]*)\s+(.*)\s*/ && do { $href{$1} = $2; next; }; # first title-tt or subhead-tt gets <TITLE>...</TITLE> # &title also adds the <H#>...</H#> to the appropriate line /^===/ && do { &title("H1", $i); next; }; /^---/ && do { &title("H2", $i); next; }; } # second pass, handle remaining typotags $curstate = $FMT; foreach (@data) { # process title information /^\.\.\s+(<H.>)(.*)(<\/H.>)/i && do { &to_fmt; $ret .= $1. &htmlize($2). $3. "\n"; next; }; next if /^\.\./; # handle line breaks if ($curstate == $FMT && /^\s*$/) { $ret .= "<P>\n" unless $fold++; next; } $fold = 0; # state transitions if (/^>\s/) { &to_quote; } elsif (/^ [^ ]/) { &to_fmt; } else { &to_pre; } s/^>\s*//; # fix quote-tt s/^ ([^ ])/\1/; # fix indent-tt # bold-tt s#\*\*([^\*]*)\*\*#\376B\377$1\376/B\377#; # italic-tt s#~([^~]*)~#\376I\377$1\376/I\377#; # hot-tt s#\b([^\s]*)_\b# $h = $href{$1}; ($a = $1) =~ s,_, ,g; $h ? qq'\376A HREF="$h"\377$a\376/A\377' : "\376I\377$a\376/I\377"; #e; # underline-tt s#_([^\s]*)_# ($a = $1) =~ s,_, ,g; "\376I\377$a\376/I\377"; #e; $ret .= &htmlize($_). "\n"; } &to_fmt; ($ret); } sub to_fmt { return if $curstate == $FMT; $ret .= "</PRE>\n" if $curstate == $PRE; $ret .= "</PRE></BLOCKQUOTE>\n" if $curstate == $QUOTE; #XXX $curstate = $FMT; } sub to_pre { return if $curstate == $PRE; $ret .= "<PRE>\n" if $curstate == $FMT; $ret .= "</PRE></BLOCKQUOTE><PRE>\n" if $curstate == $QUOTE; #XXX $curstate = $PRE; } sub to_quote { return if $curstate == $QUOTE; $ret .= "<BLOCKQUOTE><PRE>\n" if $curstate == $FMT; #XXX $ret .= "</PRE><BLOCKQUOTE><PRE>\n" if $curstate == $PRE; #XXX $curstate = $QUOTE; } sub htmlize { local($_) = @_; s/\&/\&\#38\;/g; s/\</\&\#60\;/g; s/\>/\&\#62\;/g; s/\376/</g; s/\377/>/g; # convert back $_; } sub title { local($head, $i) = @_; $data[$i--] = ".."; $data[$i] =~ s/^\s*//; # $ret .= "<TITLE>$data[$i]</TITLE>\n" unless $title++; $data[$i] = ".. <$head>" . $data[$i] . "</$head>"; } 1;